*-------------------------------------------------------------------------------
*-------------------------------------------------------------------------------
clear
set more off

*global rootfolder ""
global concordancefolder "$rootfolder\data\0_Concordances&OtherAuxiliaryFiles"
capture mkdir "$concordancefolder\output"
cd "$concordancefolder\output"

*-------------------------------------------------------------------------------
*0.1: Diverse Auxiliary Files
*-------------------------------------------------------------------------------

*-------------------------------------------------------------------------------
*0.1.1: Helpfile for Eurostat Country Codes
*-------------------------------------------------------------------------------
*note: Eurostat + changes found on
* http://ec.europa.eu/eurostat/ramon/other_documents/geonom/#Top_of_page

***insheet numbering file:
clear
insheet using "$concordancefolder\Eu_part_iso-Eu_Partners.txt", tab

***edit:
keep v1 v3 v4 v5
destring v3, replace force
drop if v3 == .
rename v1 isocode
rename v3 num_eurostat
rename v4 date_begin
rename v5 date_end

***add the aggregated EU data --> EU = 1010, EU15 = 1110, EU25 = 1111, EU27 = 1112:
local a = _N + 1
local b = _N + 2
local c = _N + 3
local d = _N + 4
local e = _N + 5

set obs `e'
replace isocode = "EU" in `a'
replace num_eurostat = 1010 in `a'
replace date_begin = "01/01/1976" in `a'
replace date_end = "31/12/2500" in `a'
replace isocode = "EU15" in `b'
replace num_eurostat = 1110 in `b'
replace date_begin = "01/01/1976" in `b'
replace date_end = "31/12/2500" in `b'
replace isocode = "EU25" in `c'
replace num_eurostat = 1111 in `c'
replace date_begin = "01/01/1976" in `c'
replace date_end = "31/12/2500" in `c'
replace isocode = "EU27" in `d'
replace num_eurostat = 1112 in `d'
replace date_begin = "01/01/1976" in `d'
replace date_end = "31/12/2500" in `d'
replace isocode = "EXT-EU" in `e'
replace num_eurostat = 1011 in `e'
replace date_begin = "01/01/1976" in `e'
replace date_end = "31/12/2500" in `e'


gen year_begin = substr(date_begin,7,4)
gen year_end = substr(date_end,7,4)
replace year_end = "2030" if year_end == "2500"
destring year_end year_begin, replace

gen n = year_end - year_begin + 1
expand n

bys isocode num_eurostat year_begin year_end: gen counter = _n
bys isocode num_eurostat year_begin year_end: gen year = year_begin+counter-1

drop date_* year_* n counter

***save:
compress
save 0.1.1_eurostat_numbering_helpfile.dta, replace

*note: this table uses GB, not UK

**
** concordance name
**
clear
insheet using "$concordancefolder\country_names.csv", delim(;) names
compress
save 0.0_concordances_names.dta, replace

*-------------------------------------------------------------------------------
*0.2: Concordance NAICS 2002 to NACE rev. 1.1
*-------------------------------------------------------------------------------

*-------------------------------------------------------------------------------
*0.2.1: insheet and edit
*-------------------------------------------------------------------------------

***insheet:
clear
insheet using "$concordancefolder\NAICS2002-to-NACE1.1_USCensusBureau.txt", comma	// attention: no names

***edit:
keep v2 v4
rename v2 nace_r1_1
rename v4 naics_2002
tostring naics_2002, replace
drop if naics_2002 == "0"

***save as helpfile:
sort naics_2002
compress
save 0.2.1_concordances_naics-2002_to_nace-r1-1_helpfile.dta, replace

*-------------------------------------------------------------------------------------------------
*0.2.2: specify 6-digit NAICS to 4-digit NACE version
*-------------------------------------------------------------------------------------------------

clear
use 0.2.1_concordances_naics-2002_to_nace-r1-1_helpfile.dta

***generate factor-variable:
sort naics_2002
by naics_2002: generate help = _n
by naics_2002: generate factor = _n
local a = _N - 1
local b = _N - 1
forvalues i = 1/`a' {
	replace factor = factor[_n+1] if help < help[_n+1] in `b'
	local b = `b' -1
}
drop help
replace factor = 1 / factor

*-----------------------------------------------------
sort naics_2002
compress
save 0.2.2_concordances_naics-2002-6digit_to_nace-r1-1-4digit.dta, replace
*-----------------------------------------------------

*-------------------------------------------------------------------------------------------------
*0.2.3: specify 5-digit NAICS to 4-digit NACE version
*-------------------------------------------------------------------------------------------------

clear
use 0.2.1_concordances_naics-2002_to_nace-r1-1_helpfile.dta

***replace naics:
replace naics_2002 = substr(naics_2002,1,5)
sort naics_2002 nace_r1_1
drop if naics_2002 == naics_2002[_n-1] & nace_r1_1 == nace_r1_1[_n-1]


***generate factor-variable:
sort naics_2002
by naics_2002: generate help = _n
by naics_2002: generate factor = _n
local a = _N - 1
local b = _N - 1
forvalues i = 1/`a' {
	replace factor = factor[_n+1] if help < help[_n+1] in `b'
	local b = `b' -1
}

*bysort naics_2002: egen gians_factor = count(naics_2002 != "")

drop help
replace factor = 1 / factor

*-----------------------------------------------------
sort naics_2002
compress
save 0.2.3_concordances_naics-2002-5digit_to_nace-r1-1-4digit.dta, replace
*-----------------------------------------------------

*-------------------------------------------------------------------------------------------------
*0.2.4: specify 4-digit NAICS to 4-digit NACE version
*-------------------------------------------------------------------------------------------------

clear
use 0.2.1_concordances_naics-2002_to_nace-r1-1_helpfile.dta

***replace naics:
replace naics_2002 = substr(naics_2002,1,4)
sort naics_2002 nace_r1_1
drop if naics_2002 == naics_2002[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
sort naics_2002
by naics_2002: generate help = _n
by naics_2002: generate factor = _n
local a = _N - 1
local b = _N - 1
forvalues i = 1/`a' {
	replace factor = factor[_n+1] if help < help[_n+1] in `b'
	local b = `b' -1
}
drop help
replace factor = 1 / factor

*-----------------------------------------------------
sort naics_2002
compress
save 0.2.4_concordances_naics-2002-4digit_to_nace-r1-1-4digit.dta, replace
*-----------------------------------------------------

*-------------------------------------------------------------------------------------------------
*0.2.5: specify 4-digit NAICS to 3-digit NACE version
*-------------------------------------------------------------------------------------------------

clear
use 0.2.1_concordances_naics-2002_to_nace-r1-1_helpfile.dta

***replace naics and nace:
replace naics_2002 = substr(naics_2002,1,4)
replace nace_r1_1 = substr(nace_r1_1,1,4)		// 4 as we have a dot in the code
sort naics_2002 nace_r1_1
drop if naics_2002 == naics_2002[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
sort naics_2002
by naics_2002: generate help = _n
by naics_2002: generate factor = _n
local a = _N - 1
local b = _N - 1
forvalues i = 1/`a' {
	replace factor = factor[_n+1] if help < help[_n+1] in `b'
	local b = `b' -1
}
drop help
replace factor = 1 / factor

*-----------------------------------------------------
sort naics_2002
compress
save 0.2.5_concordances_naics-2002-4digit_to_nace-r1-1-3digit.dta, replace
*-----------------------------------------------------

*-------------------------------------------------------------------------------------------------
*0.2.6: specify 4-digit NAICS to 2-digit NACE version
*-------------------------------------------------------------------------------------------------

clear
use 0.2.1_concordances_naics-2002_to_nace-r1-1_helpfile.dta

***replace naics and nace:
replace naics_2002 = substr(naics_2002,1,4)
replace nace_r1_1 = substr(nace_r1_1,1,3)		// 3 as we have a dot in the code
sort naics_2002 nace_r1_1
drop if naics_2002 == naics_2002[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
sort naics_2002
by naics_2002: generate help = _n
by naics_2002: generate factor = _n
local a = _N - 1
local b = _N - 1
forvalues i = 1/`a' {
	replace factor = factor[_n+1] if help < help[_n+1] in `b'
	local b = `b' -1
}
drop help
replace factor = 1 / factor

*-----------------------------------------------------
sort naics_2002
compress
save 0.2.6_concordances_naics-2002-4digit_to_nace-r1-1-2digit.dta, replace
*-----------------------------------------------------

*-------------------------------------------------------------------------------------------------
*0.2.7: specify 3-digit NAICS to 3-digit NACE version
*-------------------------------------------------------------------------------------------------
clear
use 0.2.1_concordances_naics-2002_to_nace-r1-1_helpfile.dta

***replace naics and nace:
replace naics_2002 = substr(naics_2002,1,3)
replace nace_r1_1 = substr(nace_r1_1,1,4)		// 4 as we have a dot in the code
sort naics_2002 nace_r1_1
drop if naics_2002 == naics_2002[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
sort naics_2002
by naics_2002: generate help = _n
by naics_2002: generate factor = _n
local a = _N - 1
local b = _N - 1
forvalues i = 1/`a' {
	replace factor = factor[_n+1] if help < help[_n+1] in `b'
	local b = `b' -1
}
drop help
replace factor = 1 / factor

*-----------------------------------------------------
sort naics_2002
compress
save 0.2.7_concordances_naics-2002-3digit_to_nace-r1-1-3digit.dta, replace
*-----------------------------------------------------

*-------------------------------------------------------------------------------------------------
*0.2.8: specify 3-digit NAICS to 2-digit NACE version
*-------------------------------------------------------------------------------------------------

clear
use 0.2.1_concordances_naics-2002_to_nace-r1-1_helpfile.dta

***replace naics and nace:
replace naics_2002 = substr(naics_2002,1,3)
replace nace_r1_1 = substr(nace_r1_1,1,3)		// 3 as we have a dot in the code
sort naics_2002 nace_r1_1
drop if naics_2002 == naics_2002[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
sort naics_2002
by naics_2002: generate help = _n
by naics_2002: generate factor = _n
local a = _N - 1
local b = _N - 1
forvalues i = 1/`a' {
	replace factor = factor[_n+1] if help < help[_n+1] in `b'
	local b = `b' -1
}
drop help
replace factor = 1 / factor

*-----------------------------------------------------
sort naics_2002
compress
save 0.2.8_concordances_naics-2002-3digit_to_nace-r1-1-2digit.dta, replace
*-----------------------------------------------------

*-------------------------------------------------------------------------------------------------
*0.2.9: specify 2-digit NAICS to 2-digit NACE version
*-------------------------------------------------------------------------------------------------

clear
use 0.2.1_concordances_naics-2002_to_nace-r1-1_helpfile.dta

***replace naics and nace:
replace naics_2002 = substr(naics_2002,1,2)
replace nace_r1_1 = substr(nace_r1_1,1,3)		// 3 as we have a dot in the code
sort naics_2002 nace_r1_1
drop if naics_2002 == naics_2002[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
sort naics_2002
by naics_2002: generate help = _n
by naics_2002: generate factor = _n
local a = _N - 1
local b = _N - 1
forvalues i = 1/`a' {
	replace factor = factor[_n+1] if help < help[_n+1] in `b'
	local b = `b' -1
}
drop help
replace factor = 1 / factor

*-----------------------------------------------------
sort naics_2002
compress
save 0.2.9_concordances_naics-2002-2digit_to_nace-r1-1-2digit.dta, replace
*-----------------------------------------------------

*-------------------------------------------------------------------------------------------------
*0.2.0.1: insheet and edit NACE 2012
*-------------------------------------------------------------------------------------------------

insheet using "$concordancefolder\concordance_naics2007_to_naics2002_USCensusBureau.csv", clear delim(";")
compress
save tmp_naics_07_to_02_6d.dta, replace
gen naics_2007_4d = floor(naics_2007/100)
gen naics_2002_4d = floor(naics_2002/100)
gen counter = 1
collapse (sum) counter, by(naics_2007_4d naics_2002_4d)
drop counter
gen diff = naics_2007_4d - naics_2002_4d
bysort naics_2007_4d: gen dupl_07 = _n != 1
bysort naics_2002_4d: gen dupl_02 = _n != 1
sort naics_2007_4d naics_2002_4d
sum dupl_*
drop dupl_* diff
compress
save 0.2.0.1_naics_07_to_02_4d.dta, replace

import excel using "$concordancefolder\2012_to_2007_NAICS.xls", clear
rename A naics_2012
rename C naics_2007
drop in 1/3
keep naics*
compress
destring naics_*, replace
compress
save naics_12_to_07_6d.dta, replace
gen naics_2012_4d = floor(naics_2012/100)
gen naics_2007_4d = floor(naics_2007/100)
gen counter = 1
collapse (sum) counter, by(naics_2012_4d naics_2007_4d)
drop counter
gen diff = naics_2012_4d - naics_2007_4d
bysort naics_2012_4d: gen dupl_12 = _n != 1
bysort naics_2007_4d: gen dupl_07 = _n != 1
sum dupl_*
drop dupl_* diff
compress
save naics_12_to_07_4d.dta, replace

clear
insheet using "$concordancefolder\NAICS2002-to-NACE1.1_USCensusBureau.txt", comma	// attention: no names
***edit:
keep v2 v4
rename v2 nace_r1_1
rename v4 naics_2002
tostring naics_2002, replace
drop if naics_2002 == "0"
***save as helpfile:
sort naics_2002
destring naics, replace
compress
save naics_02_nace_r1_1_6d.dta, replace
gen naics_2002_4d = floor(naics_2002/100)
gen counter =1
collapse (sum) counter, by(nace naics_2002_4d)
drop counter
compress
save 0.2.0.1_naics_02_nace_r1_1_4d.dta, replace


u naics_12_to_07_4d.dta, clear
joinby naics_2007_4d using 0.2.0.1_naics_07_to_02_4d.dta, unm(both)
tabulate _merge
drop _merge
	preserve
		keep naics_2012_4d naics_2002_4d
		tostring naics*, replace
		compress
		save naics12_to_naics02_4d.dta, replace
		clear 
		use naics12_to_naics02_4d.dta
		destring naics*, replace
		replace naics_2002_4d = floor(naics_2002_4d/10)
		replace naics_2012_4d = floor(naics_2012_4d/10)
		tostring naics*, replace
		bys naics_2002_4d naics_2012_4d: gen todrop = (_n>1)
		drop if todrop
		drop todrop
		rename naics_2002_4d naics_2002_3d
		rename naics_2012_4d naics_2012_3d
		compress
		save naics12_to_naics02_3d.dta, replace
		
	restore
joinby naics_2002_4d using 0.2.0.1_naics_02_nace_r1_1_4d.dta, unm(both)
tabulate _merge
drop if _merge == 2
drop _merge
gen counter = 1
collapse (sum) counter, by(naics_2012_4d nace_r1_1)
drop counter
rename nace_r1_1 nace_r1_1_4d
*local new = _N + 2
*set obs `new'
compress
save 0.2.0.1_concordances_naics-2012-4digit_to_nace-r1-1-4digit.dta, replace

*-------------------------------------------------------------------------------------------------
*0.2.0.2: specify 4-digit NAICS12 to 3-digit NACE version
*-------------------------------------------------------------------------------------------------

u 0.2.0.1_concordances_naics-2012-4digit_to_nace-r1-1-4digit.dta, clear

*replace naics_2002_4d = floor(naics_2002_4d/10)
*rename naics_2002_4d naics_2002_3d
gen nace_r1_1_3d = substr(nace_r1_1_4d,1,4)
drop nace_r1_1_4d
sort naics_2012_4d nace_r1_1_3d
drop if naics_2012 == naics_2012[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
bysort naics_2012: egen factor = count(naics_2012 != .)
replace factor = 1/factor
compress
save 0.2.0.2_concordances_naics-2012-4digit_to_nace-r1-1-3digit.dta, replace

*-------------------------------------------------------------------------------------------------
*0.2.0.3: specify 4-digit NAICS12 to 2-digit NACE version
*-------------------------------------------------------------------------------------------------

u 0.2.0.1_concordances_naics-2012-4digit_to_nace-r1-1-4digit.dta, clear

*replace naics_2002_4d = floor(naics_2002_4d/10)
*rename naics_2002_4d naics_2002_3d
gen nace_r1_1_2d = substr(nace_r1_1_4d,1,3)
drop nace_r1_1_4d
sort naics_2012_4d nace_r1_1_2d
drop if naics_2012 == naics_2012[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
bysort naics_2012: egen factor = count(naics_2012 != .)
replace factor = 1/factor
compress
save 0.2.0.3_concordances_naics-2012-4digit_to_nace-r1-1-2digit.dta, replace

*-------------------------------------------------------------------------------------------------
*0.2.0.4: specify 3-digit NAICS12 to 3-digit NACE version
*-------------------------------------------------------------------------------------------------

u 0.2.0.1_concordances_naics-2012-4digit_to_nace-r1-1-4digit.dta, clear

replace naics_2012_4d = floor(naics_2012_4d/10)
rename naics_2012_4d naics_2012_3d
gen nace_r1_1_3d = substr(nace_r1_1_4d,1,4)
drop nace_r1_1_4d
sort naics_2012_3d nace_r1_1_3d
drop if naics_2012 == naics_2012[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
bysort naics_2012: egen factor = count(naics_2012 != .)
replace factor = 1/factor
compress
save 0.2.0.4_concordances_naics-2012-3digit_to_nace-r1-1-3digit.dta, replace

*-------------------------------------------------------------------------------------------------
*0.2.0.5: specify 3-digit NAICS12 to 2-digit NACE version
*-------------------------------------------------------------------------------------------------

u 0.2.0.1_concordances_naics-2012-4digit_to_nace-r1-1-4digit.dta, clear

replace naics_2012_4d = floor(naics_2012_4d/10)
rename naics_2012_4d naics_2012_3d
gen nace_r1_1_2d = substr(nace_r1_1_4d,1,3)
drop nace_r1_1_4d
sort naics_2012_3d nace_r1_1_2d
drop if naics_2012 == naics_2012[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
bysort naics_2012: egen factor = count(naics_2012 != .)
replace factor = 1/factor
compress
save 0.2.0.5_concordances_naics-2012-3digit_to_nace-r1-1-2digit.dta, replace

*-------------------------------------------------------------------------------------------------
*0.2.0.6: specify 2-digit NAICS12 to 2-digit NACE version
*-------------------------------------------------------------------------------------------------

u 0.2.0.1_concordances_naics-2012-4digit_to_nace-r1-1-4digit.dta, clear

replace naics_2012_4d = floor(naics_2012_4d/100)
rename naics_2012_4d naics_2012_2d
gen nace_r1_1_2d = substr(nace_r1_1_4d,1,3)
drop nace_r1_1_4d
sort naics_2012_2d nace_r1_1_2d
drop if naics_2012 == naics_2012[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
bysort naics_2012: egen factor = count(naics_2012 != .)
replace factor = 1/factor
compress
save 0.2.0.6_concordances_naics-2012-2digit_to_nace-r1-1-2digit.dta, replace


*-------------------------------------------------------------------------------------------------
*0.2.0.7: specify 4-digit NAICS07 to 4-digit NACE version
*-------------------------------------------------------------------------------------------------


u 0.2.0.1_naics_07_to_02_4d.dta, clear
joinby naics_2002_4d using 0.2.0.1_naics_02_nace_r1_1_4d.dta, unm(both)
tabulate _merge
drop if _merge != 3
drop _merge
gen counter = 1
collapse (sum) counter, by(naics_2007_4d nace_r1_1)
sum counter
drop counter
rename nace_r1_1 nace_r1_1_4d

***generate factor-variable:
bysort naics_2007: egen factor = count(naics_2007 != .)
replace factor = 1/factor
rename naics_2007_4d naics_2007
compress
save 0.2.0.7_concordances_naics-2007-4digit_to_nace-r1-1-4digit.dta, replace

*-------------------------------------------------------------------------------------------------
*0.2.0.8: specify 4-digit NAICS07 to 3-digit NACE version
*-------------------------------------------------------------------------------------------------

u 0.2.0.7_concordances_naics-2007-4digit_to_nace-r1-1-4digit.dta, clear
drop factor
gen nace_r1_1_3d = substr(nace_r1_1_4d,1,4)
drop nace_r1_1_4d
sort naics_2007 nace_r1_1_3d
drop if naics_2007 == naics_2007[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
bysort naics_2007: egen factor = count(naics_2007 != .)
replace factor = 1/factor
compress
save 0.2.0.8_concordances_naics-2007-4digit_to_nace-r1-1-3digit.dta, replace

*-------------------------------------------------------------------------------------------------
*0.2.0.9: specify 4-digit NAICS07 to 2-digit NACE version
*-------------------------------------------------------------------------------------------------

u 0.2.0.7_concordances_naics-2007-4digit_to_nace-r1-1-4digit.dta, clear
drop factor
gen nace_r1_1_2d = substr(nace_r1_1_4d,1,3)
drop nace_r1_1_4d
sort naics_2007 nace_r1_1_2d
drop if naics_2007 == naics_2007[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
bysort naics_2007: egen factor = count(naics_2007 != .)
replace factor = 1/factor
compress
save 0.2.0.9_concordances_naics-2007-4digit_to_nace-r1-1-2digit.dta, replace

*-------------------------------------------------------------------------------------------------
*0.2.0.10: specify 3-digit NAICS07 to 3-digit NACE version
*-------------------------------------------------------------------------------------------------

u 0.2.0.7_concordances_naics-2007-4digit_to_nace-r1-1-4digit.dta, clear
drop factor
replace naics_2007 = floor(naics_2007/10)
gen nace_r1_1_3d = substr(nace_r1_1_4d,1,4)
drop nace_r1_1_4d
sort naics_2007 nace_r1_1_3d
drop if naics_2007 == naics_2007[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
bysort naics_2007: egen factor = count(naics_2007 != .)
replace factor = 1/factor
compress
save 0.2.0.10_concordances_naics-2007-3digit_to_nace-r1-1-3digit.dta, replace

*-------------------------------------------------------------------------------------------------
*0.2.0.11: specify 3-digit NAICS07 to 2-digit NACE version
*-------------------------------------------------------------------------------------------------

u 0.2.0.7_concordances_naics-2007-4digit_to_nace-r1-1-4digit.dta, clear
drop factor
replace naics_2007 = floor(naics_2007/10)
gen nace_r1_1_2d = substr(nace_r1_1_4d,1,3)
drop nace_r1_1_4d
sort naics_2007 nace_r1_1_2d
drop if naics_2007 == naics_2007[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
bysort naics_2007: egen factor = count(naics_2007 != .)
replace factor = 1/factor
compress
save 0.2.0.11_concordances_naics-2007-3digit_to_nace-r1-1-3digit.dta, replace

*-----------------------------------------------------------------------------------------------------------
*0.3: Concordance CN8 to NACE rev. 1.1
*-----------------------------------------------------------------------------------------------------------
*-------------------------------------------------------------------------------------------------
*0.3.1: insheet and edit
*-------------------------------------------------------------------------------------------------

***insheet:
clear
insheet using "$concordancefolder\CN_to_NACE_1.1_fromKDE.txt",tab names
	*The data for this concordance table is from KDE

***drop insufficient obs:
destring nace_r1_1, replace force
tostring nace_r1_1, replace
drop if nace_r1_1 == "."

***replace to unique form:
replace nace_r1_1 = substr(nace_r1_1,1,2) + "." + substr(nace_r1_1,3,2)

***generate factor-variable:
sort product_nc
by product_nc: generate help = _n
by product_nc: generate factor = _n
local a = _N - 1
local b = _N - 1
forvalues i = 1/`a' {
	replace factor = factor[_n+1] if help < help[_n+1] in `b'
	local b = `b' -1
}
drop help
replace factor = 1 / factor

*-----------------------------------------------------
sort product_nc
compress
save 0.3.1_concordances_cn8_to_nace-r1-1.dta, replace
*-----------------------------------------------------

*-----------------------------------------------------------------------------------------------------------
*0.4: Concordance NACE rev. 2 to NACE rev. 1.1
*-----------------------------------------------------------------------------------------------------------
*-------------------------------------------------------------------------------------------------
*0.4.1: insheet and edit
*-------------------------------------------------------------------------------------------------

***insheet:
clear
insheet using "$concordancefolder\NACE2-to-NACE1.1_Eurostat.txt", comma names

***drop insufficient obs:
drop if nace_r2 == "source"

***replace to unique form:
replace nace_r1_1 = nace_r1_1 + "." if substr(nace_r1_1,3,1) == ""
replace nace_r2 = nace_r2 + "." if substr(nace_r2,3,1) == ""
*add a point to the 2-digit cases

***save as helpfile:
compress
save 0.4.1_concordances_nace-r2_to_nace-r1-1_helpfile.dta, replace

*-------------------------------------------------------------------------------------------------
*0.4.2: specify 4-digit to 4-digit version
*-------------------------------------------------------------------------------------------------

clear
use 0.4.1_concordances_nace-r2_to_nace-r1-1_helpfile.dta

***generate factor-variable:
sort nace_r2
by nace_r2: generate help = _n
by nace_r2: generate factor = _n
local a = _N - 1
local b = _N - 1
forvalues i = 1/`a' {
	replace factor = factor[_n+1] if help < help[_n+1] in `b'
	local b = `b' -1
}
drop help
replace factor = 1 / factor

*-----------------------------------------------------
sort nace_r2
compress
save 0.4.2_concordances_nace-r2-4digit_to_nace-r1-1-4digit.dta, replace
*-----------------------------------------------------

*-------------------------------------------------------------------------------------------------
*0.4.3: generate a 3-digit to 3-digit version
*-------------------------------------------------------------------------------------------------

clear
use 0.4.1_concordances_nace-r2_to_nace-r1-1_helpfile.dta

***replace all to 3-digit form:
replace nace_r2 = substr(nace_r2,1,4)
replace nace_r1_1 = substr(nace_r1_1,1,4)

***drop duplicates:
sort nace_r2 nace_r1_1
drop if nace_r2 == nace_r2[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
sort nace_r2
by nace_r2: generate help = _n
by nace_r2: generate factor = _n
local a = _N - 1
local b = _N - 1
forvalues i = 1/`a' {
	replace factor = factor[_n+1] if help < help[_n+1] in `b'
	local b = `b' -1
}
drop help
replace factor = 1 / factor

*-----------------------------------------------------
sort nace_r2
compress
save 0.4.3_concordances_nace-r2-3digit_to_nace-r1-1-3digit.dta, replace
*-----------------------------------------------------

*-------------------------------------------------------------------------------------------------
*0.4.4: generate a 2-digit to 2-digit version
*-------------------------------------------------------------------------------------------------

clear
use 0.4.1_concordances_nace-r2_to_nace-r1-1_helpfile.dta

***replace all to 2-digit form:
replace nace_r2 = substr(nace_r2,1,3)
replace nace_r1_1 = substr(nace_r1_1,1,3)

***drop duplicates:
sort nace_r2 nace_r1_1
drop if nace_r2 == nace_r2[_n-1] & nace_r1_1 == nace_r1_1[_n-1]

***generate factor-variable:
sort nace_r2
by nace_r2: generate help = _n
by nace_r2: generate factor = _n
local a = _N - 1
local b = _N - 1
forvalues i = 1/`a' {
	replace factor = factor[_n+1] if help < help[_n+1] in `b'
	local b = `b' -1
}
drop help
replace factor = 1 / factor

*-----------------------------------------------------
sort nace_r2
compress
save 0.4.4_concordances_nace-r2-2digit_to_nace-r1-1-2digit.dta, replace
*-----------------------------------------------------

*-----------------------------------------------------------------------------------------------------------
*0.6: Concordance SIC 1987 to NACE rev. 1.1
*-----------------------------------------------------------------------------------------------------------

*-------------
* 0.6.1: Generate concordance SIC1987 to NAICS 2002
*--------------

clear
insheet using "$concordancefolder\1987_SIC_to_2002_NAICS.txt", tab names

drop v5 v6
rename sic sic_1987
rename naics naics_2002

drop if sic_1987 ==""

drop sictitle
drop naicstitle


***generate factor variable:
gen counter = 1
bys sic_1987: replace counter = _N
gen factor = 1/counter
drop counter
compress
save 0.6.1_concordances_sic-1987-4digit_to_naics-2002.dta, replace

*----------------------------------------------------------
* generate SIC1987 to naics 2002
*----------------------------------------------------------

clear
use 0.6.1_concordances_sic-1987-4digit_to_naics-2002.dta

drop factor
joinby naics_2002 using 0.2.1_concordances_naics-2002_to_nace-r1-1_helpfile.dta

drop naics_2002
*delete duplicates
bys sic_1987 nace_r1_1: gen dup_tag = _n
drop if dup_tag > 1
drop dup_tag

gen factor=1
bys sic_1987: replace factor = _N
replace factor = 1/factor
compress
save 0.6.2_concordances_sic-1987-4digit_to_nace_r1-1.dta, replace

* Next
cd "$rootfolder\data"
